---
title: "R Notebook"
output: html_notebook
---

This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*. 

```{r}

# Use this code (from the devtools package) to source C-3PR directly from GitHub:
require(devtools)
#install.packages('devtools')
#install.packages('RCurl')
library(devtools)
source_url('https://raw.githubusercontent.com/FredHasselman/toolboxR/master/C-3PR.R')

# This will load and (if necessary) install libraries frequently used for data management and plotting
in.IT(c('ggplot2','RColorBrewer','lattice','gridExtra','plyr','dplyr','httr'))

# Read the data from the OSF storage
# Note: get.OSFfile() returns a list with the .csv data (df) and information (info) containing the URL download timestamp and original column and rownames (these names will be changed if dfCln=TRUE).  
RPPdata <- get.OSFfile(code='https://osf.io/fgjvw/',dfCln=T)$df

## If you dowloaded the csv file to your harddrive use this code:
#  RPPdata<-read.csv('rpp_data.csv',stringsAsFactors=F )
#  RPPdata<-df.Clean(RPPdata)
#  RPPdata<-RPPdata$df

# Select the completed replication studies
RPPdata <- dplyr::filter(RPPdata, !is.na(T.pval.USE.O),!is.na(T.pval.USE.R))
# We have 99 studies for which p-values and effect sizes could be calculated
nrow(RPPdata)
# We have 97 studies for which p-values of the original effect were significantly below .05
idOK <- complete.cases(RPPdata$T.r.O,RPPdata$T.r.R)
sum(idOK)

# Get ggplot2 themes predefined in C-3PR
mytheme <- gg.theme("clean")
mypalette        <- brewer.pal(9,"YlOrRd")
```

```{r}
## CALCULATING PROPORTION REPLICATED IN DIFFERENT P-VALUE WINDOWS

raw_origP = RPPdata$T.pval.USE.O
length(raw_origP)
raw_replicP = RPPdata$T.pval.USE.R

# Proportion studies replicated when original p value <= 0.005
id = which(raw_origP <= 0.005)
origP = raw_origP[id]
replicP = raw_replicP[id]
1-sum(replicP<0.005)/length(origP) # failure rate at alpha .005
1-sum(replicP<0.05)/length(origP) # failure rate at alpha .05
X1 = sum(replicP<0.05)
N1 = length(origP)
X2 = sum(replicP<0.005)
N2 = length(origP)

# Proportion studies replicated in original p value band 0.005 to 0.05
id = which(raw_origP> 0.005 & raw_origP <= 0.05)
origP = raw_origP[id]
replicP = raw_replicP[id]
1-sum(replicP<0.05)/length(origP) # failure rate at alpha .05
1-sum(replicP<0.005)/length(origP) # failure rate at alpha .005
X3 = sum(replicP<0.05)
N3 = length(origP)
X4 = sum(replicP<0.005)
N4 = length(origP)

# Proportion studies replicated in original p value band 0.001 to 0.005
id = which(raw_origP>= 0.001 & raw_origP < 0.005)
origP = raw_origP[id]
replicP = raw_replicP[id]
1-sum(replicP<0.05)/length(origP) # failure rate at alpha .05
1-sum(replicP<0.005)/length(origP) # failure rate at alpha .005
X5 = sum(replicP<0.05)
N5 = length(origP)
X6 = sum(replicP<0.005)
N6 = length(origP)

# Proportion studies replicated in original p value band 0.0001 to 0.005
id = which(raw_origP>= 0.0001 & raw_origP < 0.005)
origP = raw_origP[id]
replicP = raw_replicP[id]
1-sum(replicP<0.05)/length(origP) # failure rate at alpha .05
1-sum(replicP<0.005)/length(origP) # failure rate at alpha .005
X7 = sum(replicP<0.05)
N7 = length(origP)
X8 = sum(replicP<0.005)
N8 = length(origP)


# Proportion studies replicated for original p < .0001
id = which(raw_origP< 0.0001)
origP = raw_origP[id]
replicP = raw_replicP[id]
1-sum(replicP<0.05)/length(origP) # failure rate at alpha .05
1-sum(replicP<0.005)/length(origP) # failure rate at alpha .005
X9 = sum(replicP<0.05)
N9 = length(origP)
X10 = sum(replicP<0.005)
N10 = length(origP)

# Calculate binomial confidence intervals
#install.packages('prevalence')
#install.packages('rjags')
# You will need to install JAGS. Here are the files for MacOS: https://sourceforge.net/projects/mcmc-jags/files/JAGS/4.x/Mac%20OS%20X/
library(rjags)
library(prevalence)

# Test if proportion replicated (at alpha = .05) are different for original p<=.005 vs .005<orig_p<=.05.
props <- propCI(x = c(X1,X3), n = c(N1,N3)) 
jeffCIs = props[props$method=='jeffreys',c('lower','upper')]
library(MASS)
prop.test(c(X1,X3), c(N1,N3), correct=FALSE) 


# Test if proportion replicated are different for original p<=.005 (replication success defined at alpha = .005) vs .005<orig_p<=.05 (replication success defined at alpha = .05).
prop.test(c(X2,X3), c(N2,N3), correct=FALSE) 


```

```{r}
#.0001<p<=.005
X7
N7
X7/N7

#p<.0001
X9
N9
X9/N9

# Prop test for difference in p(corr) between .0001<=p<.005 and .005<p<=.05
prop.test(c(X7,X3), c(N7,N3), correct=FALSE) 
```

```{r}
## FOR FIGURE 1

# Equal sized windows
currentpalette        <- brewer.pal(8,"Greys")
stepsize = 0.005
winsize = 0.005
raw_origP = RPPdata$T.pval.USE.O
length(raw_origP)
raw_replicP = RPPdata$T.pval.USE.R

# Calculate binomial confidence intervals
#install.packages('prevalence')
#install.packages('rjags')
# You will need to install JAGS. Here are the files for MacOS: https://sourceforge.net/projects/mcmc-jags/files/JAGS/4.x/Mac%20OS%20X/
library(rjags)
library(prevalence)

# Test if proportion replicated (at alpha = .05) are different for original p<=.005 vs .005<orig_p<=.05.
props <- propCI(x = c(X1,X2), n = c(N1,N2)) 
jeffCIs = props[props$method=='jeffreys',c('lower','upper')]
library(MASS)
prop.test(c(X1,X2), c(N1,N2), correct=FALSE) 


# Test if proportion replicated are different for original p<=.005 (replication success defined at alpha = .005) vs .005<orig_p<=.05 (replication success defined at alpha = .05).
prop.test(c(X3,X2), c(N3,N2), correct=FALSE) 

#length(origP)
id = which(raw_origP < 0.05)
origP = raw_origP[id]
replicP = raw_replicP[id]
length(origP)
startwins = seq(0,0.05-winsize,by=stepsize)
winProps <- 0
Nsamp  <- 0
i=0
for (startw in startwins){
  i=i+1
  endw = startw + winsize
  inds = origP>=startw & origP<endw
  origPs = origP[inds]
  replicPs = replicP[inds]
  winProps[i] = sum(replicPs<0.05)/length(replicPs)
  Nsamp[i] = length(replicPs)
}

# 
# startwins = head(startwins,-1) # the last window has no data points and has NaN
# winProps = head(winProps,-1)
# Nsamp = head(Nsamp,-1)
props <- propCI(x = winProps*Nsamp, n = Nsamp) 
jeffCIs = props[props$method=='jeffreys',c('lower','upper')]

eqwin_df = data.frame(startwins, winProps, Nsamp, jeffCIs)
colnames(eqwin_df)[1] <- 'window start'
colnames(eqwin_df)[2] <- 'proportion replicated'
colnames(eqwin_df)[3] <- 'number of studies'



pdf("Figures/Fig1_propReplicated_eqwin005_step005.pdf",height=4,width=7)

g <- ggplot(eqwin_df,aes(x=startwins+0.0025,y=winProps))+ geom_point(aes(size=Nsamp)) +scale_x_continuous(breaks=c(0,.005,.01,.015,.02,.025,.03,.035,.04,.045,.05),limits=c(0,.05)) + 
  ggtitle("") + xlab("Original study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + geom_errorbar(aes(ymin=lower, ymax=upper), width=.5, size = .8,alpha=0.5) +
  geom_vline(aes(xintercept=0.05),linetype=2,color=mypalette[8]) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[8]) + 
  scale_fill_discrete(name="number of studies") + mytheme
g + guides(color = F) + scale_size(name="number of studies") +
  theme(legend.position=c(.921, .5)) +  theme_bw(base_size = 12) + theme(panel.grid.minor.x=element_blank(),
           panel.grid.major.x=element_blank())
dev.off()

#geom_line(size=1,alpha=0.5)

png("Figures/Fig1_propReplicated_eqwin005_step005.png",height=4,width=7,units = "in",res=600)

g <- ggplot(eqwin_df,aes(x=startwins+0.0025,y=winProps))+ geom_point(aes(size=Nsamp)) +scale_x_continuous(breaks=c(0,.005,.01,.015,.02,.025,.03,.035,.04,.045,.05),limits=c(0,.05)) + 
  ggtitle("") + xlab("Original study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + geom_errorbar(aes(ymin=lower, ymax=upper), width=.5, size = .8,alpha=0.5) +
  geom_vline(aes(xintercept=0.05),linetype=2,color=mypalette[8]) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[8]) + 
  scale_fill_discrete(name="number of studies") + mytheme
g + guides(color = F) + scale_size(name="number of studies") +
  theme(legend.position=c(.921, .5)) +  theme_bw(base_size = 12) + theme(panel.grid.minor.x=element_blank(),
           panel.grid.major.x=element_blank())
dev.off()

```

```{r}
# Spearman's correlation between original and replication p-values
raw_origP = RPPdata$T.pval.USE.O
length(raw_origP)
raw_replicP = RPPdata$T.pval.USE.R
#length(origP)
id = which(raw_origP < 0.05)
origP = raw_origP[id]
replicP = raw_replicP[id]


library(Hmisc)
#install.packages('Hmisc')
pmat = matrix(c(origP,replicP),nrow=length(origP),ncol=2)
df = data.frame(origP,replicP)
#df
rcorr(pmat, type="spearman")
raw_df = data.frame(raw_origP,raw_replicP)
raw_pmat = matrix(c(raw_origP,raw_replicP),nrow=length(raw_origP),ncol=2)
rcorr(raw_pmat, type="spearman")
rcorr(raw_pmat, type="pearson")

ggplot(df, aes(x=origP, y=replicP)) +
    geom_point(shape=1) +    # Use hollow circles
    geom_smooth(method=lm) +   # Add linear regression line 
                             #  (by default includes 95% confidence region)
    scale_x_continuous(breaks=c(0,.01,.05),limits=c(0,0.06)) 


# Spearman's correlation excluding p<.0001 (as I suspect those may be driving most of the observed correlation of .3 above)
raw_origP = RPPdata$T.pval.USE.O
raw_replicP = RPPdata$T.pval.USE.R
length(raw_origP)
id = which(raw_origP >= .0001)# & raw_origP < 0.05)
origP = raw_origP[id]
replicP = raw_replicP[id]

pmat = matrix(c(origP,replicP),nrow=length(origP),ncol=2)
df = data.frame(origP,replicP)
#df
rcorr(pmat, type="spearman")

ggplot(df, aes(x=origP, y=replicP)) +
    geom_point(shape=1) +    # Use hollow circles
    geom_smooth(method=lm) +   # Add linear regression line 
                             #  (by default includes 95% confidence region)
    scale_x_continuous(breaks=c(0,.01,.05),limits=c(0,0.06)) 
```

```{r}
# Windows of size .01 except for the first window (.005)
length(origP)
startwins = c(0,.005,.01, .02, .03, .04, .05)
midwins = c(.0025,.0075,.015,.025,.035,.045)
winProps <- 0
Nsamp  <- 0
i=0
for (startw in head(startwins,-1)){
  i=i+1
  endw = startwins[i+1]
  inds = origP>=startw & origP<endw
  origPs = origP[inds]
  replicPs = replicP[inds]
  winProps[i] = sum(replicPs<0.05)/length(replicPs)
  Nsamp[i] = length(replicPs)
}
props <- propCI(x = winProps*Nsamp, n = Nsamp) 
jeffCIs = props[props$method=='jeffreys',c('lower','upper')]

eqwin_df = data.frame(head(startwins,-1), winProps, Nsamp, jeffCIs)
colnames(eqwin_df)[1] <- 'window start'
colnames(eqwin_df)[2] <- 'proportion replicated'
colnames(eqwin_df)[3] <- 'sample size'

g <- ggplot(eqwin_df,aes(x=midwins,y=winProps))+ geom_point(aes(size=Nsamp)) +scale_x_continuous(breaks=c(0,.005,.01,.015,.02,.025,.03,.035,.04,.045,.05),limits=c(0,.05)) + 
  ggtitle("") + xlab("Original study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + geom_errorbar(aes(ymin=lower, ymax=upper), width=.5, size = .8,alpha=0.5) +
  geom_vline(aes(xintercept=0.05),linetype=2,color=mypalette[8]) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[8]) + 
  scale_fill_discrete(name="sample size") + mytheme
g + guides(color = F) + scale_size(name="sample size") +
  theme(legend.position=c(.921, .5)) +  theme_bw(base_size = 12) + theme(panel.grid.minor.x=element_blank(),
           panel.grid.major.x=element_blank())
```

```{r}
## Alpha 0.005, original studies with p<0.005
# Equal sized windows
currentpalette        <- brewer.pal(8,"Greys")
stepsize = 0.0005
winsize = 0.001
raw_origP = RPPdata$T.pval.USE.O
length(raw_origP)
raw_replicP = RPPdata$T.pval.USE.R
#length(origP)
id = which(raw_origP < 0.005)
origP = raw_origP[id]
replicP = raw_replicP[id]
length(origP)
startwins = seq(0,0.005-stepsize,by=stepsize)
winProps <- 0
Nsamp  <- 0
i=0
for (startw in startwins){
  i=i+1
  endw = startw + winsize
  inds = origP>=startw & origP<endw
  origPs = origP[inds]
  replicPs = replicP[inds]
  winProps[i] = sum(replicPs<0.005)/length(replicPs)
  Nsamp[i] = length(replicPs)
}

eqwin_df = data.frame(startwins, winProps, Nsamp)
colnames(eqwin_df)[1] <- 'window start'
colnames(eqwin_df)[2] <- 'proportion replicated'
colnames(eqwin_df)[3] <- 'sample size'


pdf("propReplicated_eqwin001_step0005.pdf")

g <- ggplot(eqwin_df,aes(x=startwins,y=winProps))+ geom_point(aes(size=Nsamp),color = currentpalette[6],alpha=.8) +
  geom_line()+scale_x_continuous(breaks=c(0,.001,.005),limits=c(0,.005)) + 
  ggtitle("") + xlab("Original study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[8]) + 
  geom_vline(aes(xintercept=0.0005),linetype=2,color=mypalette[8]) + 
  scale_fill_discrete(name="sample size") + mytheme
g + guides(color = F) + scale_size(name="sample size") +
  theme(legend.position=c(.921, .5))
dev.off()

winProps*Nsamp
Nsamp
```

```{r}
## PREVIOUS VERSION OF FIGURE 1, UNUSED
# Equal sized windows
currentpalette        <- brewer.pal(8,"Greys")
stepsize = 0.005
winsize = 0.01
#length(origP)
id = which(raw_origP < 0.06)
origP = raw_origP[id]
replicP = raw_replicP[id]
length(origP)
startwins = seq(0,0.06-winsize,by=stepsize)
winProps <- 0
Nsamp  <- 0
i=0
for (startw in startwins){
  i=i+1
  endw = startw + winsize
  inds = origP>=startw & origP<endw
  origPs = origP[inds]
  replicPs = replicP[inds]
  winProps[i] = sum(replicPs<0.05)/length(replicPs)
  Nsamp[i] = length(replicPs)
}


props <- propCI(x = winProps*Nsamp, n = Nsamp) 
jeffCIs = props[props$method=='jeffreys',c('lower','upper')]



eqwin_df = data.frame(startwins, winProps, Nsamp, jeffCIs)
colnames(eqwin_df)[1] <- 'window start'
colnames(eqwin_df)[2] <- 'proportion replicated'
colnames(eqwin_df)[3] <- 'sample size'
## THIS IS NOT USED ##
pdf("Figures/propReplicated_eqwin01_step005.pdf",height=4,width=6)

g <- ggplot(eqwin_df,aes(x=startwins,y=winProps))+ geom_point(aes(size=Nsamp),color = currentpalette[6],alpha=.8) +
  geom_line(size=1,alpha=0.5)+scale_x_continuous(breaks=c(0,.01,.02,.03,.04,.05),limits=c(0,.05)) + 
  ggtitle("") + xlab("Original study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + geom_errorbar(aes(ymin=lower, ymax=upper), width=.5, size = .8) +
  geom_vline(aes(xintercept=0.05),linetype=2,color=mypalette[8]) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[8]) + 
  scale_fill_discrete(name="sample size") + mytheme
g + guides(color = F) + scale_size(name="sample size") +
  theme(legend.position=c(.921, .5)) +  theme_bw(base_size = 12) + theme(panel.grid.minor.x=element_blank(),
           panel.grid.major.x=element_blank())
dev.off()


png("Figures/propReplicated_eqwin01_step005.png",height=4,width=6,units = "in",res=300)

g <- ggplot(eqwin_df,aes(x=startwins,y=winProps))+ geom_point(aes(size=Nsamp),color = currentpalette[6],alpha=.8) +
  geom_line(size=1,alpha=0.5)+scale_x_continuous(breaks=c(0,.01,.02,.03,.04,.05),limits=c(0,.05)) + 
  ggtitle("") + xlab("Original study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + geom_errorbar(aes(ymin=lower, ymax=upper), width=.5, size = .8) +
  geom_vline(aes(xintercept=0.05),linetype=2,color=mypalette[8]) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[8]) + 
  scale_fill_discrete(name="sample size") + mytheme
g + guides(color = F) + scale_size(name="sample size") +
  theme(legend.position=c(.921, .5)) +  theme_bw(base_size = 12) + theme(panel.grid.minor.x=element_blank(),
           panel.grid.major.x=element_blank())
dev.off()
```

```{r}
# Sampe size - matched windows
stepsize = 0.005
sampSize = 10 # use windows with 20 samples
raw_origP = RPPdata$T.pval.USE.O
length(raw_origP)
raw_replicP = RPPdata$T.pval.USE.R
#length(origP)
id = which(raw_origP < 0.06)
origP = raw_origP[id]
replicP = raw_replicP[id]
length(origP)
startwins = seq(0,0.06-stepsize,by=stepsize)
winProps <- 0
i=0

sortedOrigPInd = sort(origP,index.return = TRUE)
sortedOrigP = sortedOrigPInd$x
sortedInds = sortedOrigPInd$ix
sortedReplicP = replicP[sortedInds]
for (startw in startwins){
  i=i+1
  # in the sorted origP, start closest to this position, and get origP value sampSize positions from the current position
  # that is the end point of the window
  ind = which.min(abs(sortedOrigP - startw)) 
  endw = sortedOrigP[ind+sampSize]
  
  inds = origP>=startw & origP<endw
  origPs = origP[inds]
  replicPs = replicP[inds]
  winProps[i] = sum(replicPs<0.05)/length(replicPs)
}

eqwin_df = data.frame(startwins, winProps)


pdf("propReplicated_sizematchedwin_N20_step005.pdf")

ggplot(eqwin_df,aes(x=startwins,y=winProps))+ geom_point() +
  geom_line()+scale_x_continuous(breaks=c(0,.01,.05),limits=c(0,.06)) + 
  ggtitle("") + xlab("Original Study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + 
  geom_vline(aes(xintercept=0.05),linetype=2,color=mypalette[9]) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[9]) + 
  mytheme
dev.off()
```


```{r}
# Checking veracity of statement: "26 of 63 (41%) original studies
# with P < 0.02 achieved P < 0.05 in the replication"
ids = which(origP<0.02)
length(ids)
repP = replicP[ids]
length(which(repP<0.05))
# my ans: 26/64

# Checking veracity of statement: "6 of 23 (26%) that had a P value between 0.02 < P < 0.04 achieved P < 0.05 in the replication"
ids = which(origP<0.04 & origP>0.02)
length(ids)
repP = replicP[ids]
length(which(repP<0.05))
# my ans: 7/23

# Checking veracity of statement: "2 of 11 (18%) that had
# a P value > 0.04  achieved P < 0.05 in the replication"
ids = which(origP>0.04)
length(ids)
repP = replicP[ids]
length(which(repP<0.05))
# my ans: 1/5 if not considering original p > 0.05 and 1/9 if considering 0.05 < p < 0.06


```



```{r}
# The appropriate control analysis is to look at p<0.005 and determine how many replicated at p<0.005
# This controls for boundary effects near the threshold (because obviously, there is a distribution of expected p values around the original p value assuming that there is a correlation (which there does seem to be, see Table 2, Science paper)).
# Also looked at proportion of studies replicated when original p value was between 0.0001 and 0.001. 50% at best (alpha = 0.05) replicated.

# Equal sized windows
stepsize = 0.001
winsize = 0.001
raw_origP = RPPdata$T.pval.USE.O
length(raw_origP)
raw_replicP = RPPdata$T.pval.USE.R
#length(origP)
minP = 0.0001
maxP = 0.006
id = which(raw_origP> minP & raw_origP < maxP)
origP = raw_origP[id]
replicP = raw_replicP[id]
length(origP)
startwins = seq(minP,maxP-stepsize,by=stepsize)
winProps <- 0
i=0
for (startw in startwins){
  i=i+1
  endw = startw + winsize
  inds = origP>=startw & origP<endw
  origPs = origP[inds]
  replicPs = replicP[inds]
  winProps[i] = sum(replicPs<0.005)/length(replicPs)
}

eqwin_df = data.frame(startwins, winProps)


pdf("propReplicatedat005_eqwin001_step0005.pdf")

ggplot(eqwin_df,aes(x=startwins,y=winProps))+ geom_point() +
  geom_line()+scale_x_continuous(breaks=c(0,.0001,.005),limits=c(0,.006)) + 
  ggtitle("") + xlab("Original Study p-value") + ylab("Proportion of studies replicated") + 
  ylim(c(0,1)) + 
  geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[9]) + 
  geom_vline(aes(xintercept=0.001),linetype=2,color=mypalette[9]) + 
  mytheme
dev.off()


#Scatter
pmat = matrix(c(origP,replicP),nrow=length(origP),ncol=2)
df = data.frame(origP,replicP)
#df
rcorr(pmat, type="spearman")
raw_df = data.frame(raw_origP,raw_replicP)
raw_pmat = matrix(c(raw_origP,raw_replicP),nrow=length(raw_origP),ncol=2)
rcorr(raw_pmat, type="spearman")

ggplot(df, aes(x=origP, y=replicP)) +
    geom_point(shape=1) +    # Use hollow circles
    geom_smooth(method=lm) +   # Add linear regression line 
                             #  (by default includes 95% confidence region)
    scale_x_continuous(breaks=c(minP,.001,maxP),limits=c(minP,maxP)) +
    geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[9]) + 
    geom_hline(aes(yintercept=0.005),linetype=2,color=mypalette[9]) + 
    mytheme


# Proportion studies replicated in original p value band 0.0001 to 0.001
id = which(raw_origP> 0.0001 & raw_origP <= 0.001)
origP = raw_origP[id]
replicP = raw_replicP[id]
sum(replicP<0.05)/length(origP) 

# Scatter
ggplot(df, aes(x=origP, y=replicP)) +
    geom_point(shape=1) +    # Use hollow circles
    geom_smooth(method=lm) +   # Add linear regression line 
                             #  (by default includes 95% confidence region)
    scale_x_continuous(breaks=c(minP,.001,0.001),limits=c(minP,0.001)) +
    geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[9]) + 
    geom_hline(aes(yintercept=0.05),linetype=2,color=mypalette[9]) + 
    mytheme

# Proportion studies replicated when original p value <= 0.005
id = which(raw_origP <= 0.005)
origP = raw_origP[id]
replicP = raw_replicP[id]
1-sum(replicP<0.005)/length(origP) 

# Proportion studies replicated in original p value band 0.005 to 0.05
id = which(raw_origP> 0.005 & raw_origP <= 0.05)
origP = raw_origP[id]
replicP = raw_replicP[id]
1-sum(replicP<0.05)/length(origP) 

```

# Animation by window sizes
```{r}
plot_win <- function(winsize, stepsize) {
  id = which(raw_origP < 0.05)
  origP = raw_origP[id]
  replicP = raw_replicP[id]
  length(origP)
  startwins = seq(0,0.05-winsize,by=stepsize)
  winProps <- 0
  Nsamp  <- 0
  i=0
  for (startw in startwins){
    i=i+1
    endw = startw + winsize
    inds = origP>=startw & origP<endw
    origPs = origP[inds]
    replicPs = replicP[inds]
    winProps[i] = sum(replicPs<0.05)/length(replicPs)
    Nsamp[i] = length(replicPs)
  }
  
  # 
  # startwins = head(startwins,-1) # the last window has no data points and has NaN
  # winProps = head(winProps,-1)
  # Nsamp = head(Nsamp,-1)
  props <- propCI(x = winProps*Nsamp, n = Nsamp) 
  jeffCIs = props[props$method=='jeffreys',c('lower','upper')]
  
  eqwin_df = data.frame(startwins, winProps, Nsamp, jeffCIs)
  colnames(eqwin_df)[1] <- 'window start'
  colnames(eqwin_df)[2] <- 'proportion replicated'
  colnames(eqwin_df)[3] <- 'sample size'
  
  
  
  g <- ggplot(eqwin_df,aes(x=startwins,y=winProps))+ 
    geom_vline(aes(xintercept=0.05),linetype=2,color=mypalette[8]) + 
    geom_vline(aes(xintercept=0.005),linetype=2,color=mypalette[8]) + 
    geom_point(aes(size=Nsamp),color = currentpalette[6],alpha=.8) +
    geom_line(size=1,alpha=0.5)+scale_x_continuous(breaks=c(0,.01,.02,.03,.04,.05),limits=c(0,.05)) + 
    ggtitle("") + xlab(paste0("Original study p-value (window size:", winsize, ")" )) + ylab("Proportion of studies replicated") + 
    ylim(c(0,1)) + geom_errorbar(aes(ymin=lower, ymax=upper), width=.5, size = .8) +
    scale_fill_discrete(name="sample size") + mytheme
  g + guides(color = F) + scale_size(name="sample size") +
    theme(legend.position=c(.921, .5)) +  theme_bw(base_size = 12) + theme(panel.grid.minor.x=element_blank(),
                                                                           panel.grid.major.x=element_blank())
}


library(magick)
img <- image_graph(800, 200, res = 72)
for (i in seq(.005, .010, .001)) {
  p <- plot_win(winsize = i, stepsize = .001)  
  print(p)
}
dontcare <- dev.off()
animation <- image_animate(img, fps = 2)
print(animation)

image_write(animation, "Figures/Fig2-animated.gif")
```


When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file).
